/*
Create year files for all Brazil to use for etsimating PageRank values
	// input: clean data files 2009-2016, e.g., RR2009_RAIS,...,RR2016_RAIS
	// output: year-specific master file, e.g., cleanYYYY

*/
		
cap log close
log using "$logs/prep_rais_ranks", replace

* Loop over years
local agrp "2009 2010 2011 2012 2013 2014 2015 2016"
local bgrp "68.79392762 72.26026576 77.05572389 81.21947107 86.25859992 91.71803801 100 108.7391289"
//average Brazil CPI base year 2015 (=100)
local n : word count `agrp'
forvalues i = 1/`n' {
	local a : word `i' of `agrp'
	local b : word `i' of `bgrp'
	//create a year-specific folder
	cap mkdir "$files/`a'"
	
	* Loop over states
	local qgrp "RR AC AP TO PI SE AL RO PB AM RN MA MS MT ES PA DF CE GO PE BA SC RS PR MG RJ SP"
	local m : word count `qgrp'
	forvalues l = 1/`m' {
		local q : word `l' of `qgrp'

		* Load RAIS
		use "$files/rais/`q'/`q'`a'_RAIS", clear
		
		* Initial restrictions
		//workers on open-ended contracts, working full-time, and earning monthly wages
		keep if (wagetype==1)&(contracthours>=30)&(emptype>=10)&(emptype<=25)
		//private sector establishments
		drop if (cnae20subcl==6410700)| ///
		 (concla==2208)|(cnae20subcl<0)
		keep if (concla>=2000)&(concla<4000)
		drop if (concla<=2020)
		
		* Modify variables
		gen year = `a'
		gen cpi = `b'
		rename (estabid firmidcnpj pis education cnae20subcl municipality ///
		 contracthours cbo02 sepmonth hiremonth remunavgminw remunavgnom ///
		 remundecminw remundecnom contractwage) ///
		 (empid_est empid_firm persid edu ind07_7 muni ///
		 hours occ02_6 sep_month hire_month earn_mean_mw earn_mean ///
		 earn_dec_mw earn_dec earn_contract)
		//5-digit industry
		gen ind07_5 = floor(ind07_7/100)
		//experience with original education variables (in months)
		gen schooling=0 if edu==1
		replace schooling=2 if edu==2
		replace schooling=4 if edu==3
		replace schooling=6 if edu==4
		replace schooling=8 if edu==5
		replace schooling=9 if edu==6
		replace schooling=11 if edu==7
		replace schooling=13 if edu==8
		replace schooling=15 if edu==9
		replace schooling=17 if edu==10
		replace schooling=19 if edu==11
		gen exp_act = age - schooling - 6
		replace exp_act = 0 if exp_act<0
		drop schooling
		replace exp_act = exp_act*12
		//hours worked
		gen months = sep_month - hire_month + 1 if (sep_month!=0)&(hire_month!=0)
		replace months = sep_month if (sep_month!=0)&(hire_month==0)
		replace months = 12 - hire_month + 1 if (sep_month==0)&(hire_month!=0)
		replace months = 12 if (sep_month==0)&(hire_month==0)
		gen hours_year = hours*4.34524*months
		drop months
		//recoding race
		gen xxx = . 
		replace xxx = 1 if race==1
		replace xxx = 2 if race==2
		replace xxx = 3 if race==4
		replace xxx = 4 if race==6
		replace xxx = 5 if race==8
		replace race = xxx
		drop xxx
		//real earnings
		replace earn_mean = 100*(earn_mean/`b')
		replace earn_dec = 100*(earn_dec/`b')
		replace earn_contract = 100*(earn_contract/`b')
		//firm age (placeholder)
		gen age_est = 1
		gen age_firm = 1

		keep year empid_est empid_firm persid edu ind07_5 muni ///
		 hours occ02_6 sep_month hire_month earn_mean_mw earn_mean ///
		 earn_dec_mw earn_dec earn_contract gender age tenure race ///
		 hours_year exp_act cpi age_est age_firm dob
		label val *
		if `l'==1 {
			compress
			save "$files/`a'/clean`a'", replace
		}
		else {
			append using "$files/`a'/clean`a'"
			compress
			save "$files/`a'/clean`a'", replace
		}
	}
	gen double id_unique = _n
	gen hschool = 1 if (edu>=1)&(edu<=6)
	replace hschool = 2 if (edu>=7)&(edu<=11)
	mdesc
	compress
	save "$files/`a'/clean`a'", replace

}

log close
